library(tidyverse)
library(readr)
library(fuzzyjoin)
library(car)
library(stringr)
library(maps)
library(data.table)
library(cdlTools)
setwd(rprojroot::find_root(rprojroot::is_rstudio_project))
getwd()
df <- fread("Intermediate Data/merged_cces_rollcall.csv")
df<-tibble(df)
link <- readxl::read_xlsx("Intermediate Data/policy_outcomes_and_notes.xlsx")
link<-link[!is.na(link$data_num),]


house_only <- link %>% subset(is.na(link$variable_name_senate) & !is.na(link$variable_name_house))
house_only<-house_only %>% mutate (ids = paste0(house_only$variable_name_house, house_only$data_year))

ids <- paste0(house_only$variable_name_house, house_only$data_year)
#sen_ids <- paste0(c(ids, ids), "_", c(rep(1, length(ids)), rep(2, length(ids))))
#missing_votes <- left_join(house_only, tibble(c(ids, ids), sen_ids), by=c("ids" = "c(ids, ids)"))
sen_ids <- paste0( ids, "_", c(rep(1, length(ids)), rep(2, length(ids))))
missing_votes <- left_join(house_only, tibble(c(ids, ids), sen_ids), by=c("ids" = "c(ids, ids)"))


mcs <- fread("Intermediate Data/HSall_members.csv")

#load data from github (https://github.com/unitedstates/congress-legislators?tab=readme-ov-file) that's converted from YAML to csv
#Data accessed 24 Jan, 2025. As Senators resign, the files they are contained in will change. 
mc <- fread("Intermediate Data/legislators-historical.csv", colClasses = "character")
mc_c <- fread("Intermediate Data/legislators-current.csv", colClasses = "character")

#subset to relevant fields + order in consistent order to allow reshape
mc2 <- mc %>%
  select(colnames(mc)[1:10],ends_with(c("/type", "/start", "/end", "/state", "/party"))) %>%
  select(colnames(mc)[1:10], starts_with("terms/")) %>%
  select(-contains("party_affiliations"))%>% 
  select(sort(names(.)))

mc_c2 <- mc_c %>%
  select(colnames(mc_c)[1:22],ends_with(c("/type", "/start", "/end", "/state", "/party"))) %>%
  select(colnames(mc_c)[1:22], starts_with("terms/")) %>%
  select(-contains("party_affiliations"))%>% 
  select(sort(names(.)))

#Merge current and former senators
mc3<-rbind(mc2, mc_c2, fill=T)

#reshape wide to long
x<-reshape(mc3, direction='long', 
        varying=c(colnames(mc3)[11:160]), 
        timevar='var',
        times=c('terms/0/', 'terms/1/', 'terms/2/', 'terms/3/',
                'terms/4/', 'terms/5/', 'terms/6/', 'terms/7/',
                'terms/8/', 'terms/9/', 'terms/10/', 'terms/11/',
                'terms/12/', 'terms/13/', 'terms/14/', 'terms/15/',
                'terms/16/', 'terms/17/', 'terms/18/', 'terms/19/',
                'terms/20/', 'terms/21/', 'terms/22/', 'terms/23/',
                'terms/24/', 'terms/25/', 'terms/26/', 'terms/27/',
                'terms/28/', 'terms/29/'),
        v.names=c('end', 'party', "start", "state", "type"),
        idvar='id/bioguide') %>% filter(type=="sen")


 
test<-left_join(house_only, x, by = join_by(between(house_vote_date, 
                                                 start,
                                                 end)))

test$dat_vote_ID <- paste0(test$variable_name_house, test$data_year)

#a <- left_join(df, test, by="dat_vote_ID")



################################
#I should be able to merge Senate info into the house data, then append relevant columns to the Senate data 
house <- fread("Intermediate Data/house_vote_level_data.csv")
house$state_m <- fips(house$state, to='Abbreviation')
missing_senate_votes<-left_join(test, house, by=c("dat_vote_ID"="vote","state" = "state_m"))
missing_senate_votes$sen <- missing_senate_votes$`id/icpsr`
fwrite(missing_senate_votes, "Intermediate Data/senate_vote_level_data_missingvotes.csv")
